Importing data

countMatrix <- ReadDataFrameFromTsv(file.name.path="../data/refSEQ_countMatrix.txt")
## ../data/refSEQ_countMatrix.txt read from disk!
# head(countMatrix)

designMatrix <- ReadDataFrameFromTsv(file.name.path="../design/all_samples_short_names.tsv.csv")
## ../design/all_samples_short_names.tsv.csv read from disk!
# head(designMatrix)

rownames <- as.integer(rownames(countMatrix))

rownames <- rownames[order(rownames)]
rownames.map <- convertGenesViaBiomart(specie="mm10", filter="entrezgene",
                                       filter.values=rownames, c("external_gene_name",
                                                                 "mgi_symbol", "entrezgene"))


noNaCountMatrix <- attachGeneColumnToDf(mainDf=countMatrix,
                                genesMap=rownames.map,
                                rowNamesIdentifier="entrezgene",
                                mapFromIdentifier="entrezgene",
                                mapToIdentifier="external_gene_name")
dim(noNaCountMatrix)
## [1] 20730    42
filteredCountsProp <- filterLowCounts(counts.dataframe=noNaCountMatrix, 
                                    is.normalized=FALSE,
                                    design.dataframe=designMatrix,
                                    cond.col.name="gcondition",
                                    method.type="Proportion")
## features dimensions before normalization: 20730
## Filtering out low count features...
## 13580 features are to be kept for differential expression analysis with filtering method 3

Plot PCA of log unnormalized data

pc1_2 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(filteredCountsProp), design.matrix=designMatrix, shapeColname="classic", colorColname="gcondition", xPCA="PC1", yPCA="PC2", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="Prop-Un-Norm")
## [1] TRUE
pc2_3 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(filteredCountsProp), design.matrix=designMatrix, shapeColname="classic", colorColname="gcondition", xPCA="PC2", yPCA="PC3", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="Prop-Un-Norm")
## [1] TRUE
pc1_3 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(filteredCountsProp), design.matrix=designMatrix, shapeColname="classic", colorColname="gcondition", xPCA="PC1", yPCA="PC3", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="Prop-Un-Norm")
## [1] TRUE
plotly::subplot(pc1_2, pc2_3, pc1_3, nrows=2, margin = 0.1, titleX=TRUE, titleY=TRUE)

Normalizations

Upper Quartile Normalization

normPropCountsUqua <- NormalizeData(data.to.normalize=filteredCountsProp, 
                                    norm.type="uqua", 
                                    design.matrix=designMatrix)

pc1_2 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(normPropCountsUqua), design.matrix=designMatrix, shapeColname="classic", colorColname="gcondition", xPCA="PC1", yPCA="PC2", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="UQUA-Norm")
## [1] TRUE
pc2_3 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(normPropCountsUqua), design.matrix=designMatrix, shapeColname="classic", colorColname="gcondition", xPCA="PC2", yPCA="PC3", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="UQUA-Norm")
## [1] TRUE
pc1_3 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(normPropCountsUqua), design.matrix=designMatrix, shapeColname="classic", colorColname="gcondition", xPCA="PC1", yPCA="PC3", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="UQUA-Norm")
## [1] TRUE
plotly::subplot(pc1_2, pc2_3, pc1_3, nrows=2, margin = 0.1, titleX=TRUE, titleY=TRUE)

edgeR DE Analysis

### edgering
source("../R/edgeRFunctions.R")
source("../R/VolcanoPlotFunctions.R")
source("../R/plotUtils.R")
source("../R/MAPlotFunctions.R")
source("../R/pvalHistPlotFunctions.R")
source("../R/plotUtils.R")

cc <- c("WTSD5 - WTHC5", "WTRS2 - WTHC7")

rescList1 <- applyEdgeR(counts=normPropCountsUqua, design.matrix=designMatrix,
                        factors.column="gcondition", 
                        weight.columns=NULL,
                        contrasts=cc, useIntercept=FALSE, p.threshold=1,
                        verbose=TRUE)

WTSD5 - WTHC5

PlotHistPvalPlot(de.results=rescList1[[1]], design.matrix=designMatrix, 
                show.plot.flag=TRUE, plotly.flag=TRUE, 
                prefix.plot=names(rescList1)[1])

SD loading positive controls

sd.pos.ctrls <- ReadDataFrameFromTsv(file.name.path="../data/SD_full_pos_control_genes_BMC_genomics.csv")

sd.pos.ctrls <- sd.pos.ctrls$MGI.Symbol

sd.pos.map <- convertGenesViaBiomart(specie="mm10", filter="mgi_symbol",
                    filter.values=sd.pos.ctrls, c("external_gene_name",
                                "mgi_symbol", "entrezgene"))

sd.pos.map.nna <- sd.pos.map[-which(is.na(sd.pos.map$entrezgene)),]

sd.pos.genes.entrez <- sd.pos.map.nna$entrezgene
## mapping ensembl gene id using biomart
res.o.map <- convertGenesViaBiomart(specie="mm10", filter="entrezgene",
                            filter.values=rownames(rescList1[[1]]), 
                            c("external_gene_name", "mgi_symbol", "entrezgene"))


res.o <- attachGeneColumnToDf(mainDf=rescList1[[1]],
                                genesMap=res.o.map,
                                rowNamesIdentifier="entrezgene",
                                mapFromIdentifier="entrezgene",
                                mapToIdentifier="external_gene_name")


PlotVolcanoPlot(de.results=res.o, counts.dataframe=normPropCountsUqua, design.matrix=designMatrix,
                show.plot.flag=TRUE, plotly.flag=TRUE, save.plot=FALSE, prefix.plot=names(rescList1)[1], threshold=0.05,
                positive.ctrls.list=sd.pos.genes.entrez)
# PlotMAPlotCounts(de.results=res.o, counts.dataframe=normExprData, design.matrix=desMat,
#                  show.plot.flag=TRUE, plotly.flag=TRUE, save.plot=FALSE, prefix.plot=names(rescList1)[1], threshold=0.05)

WTRS2 - WTHC7

PlotHistPvalPlot(de.results=rescList1[[2]], design.matrix=designMatrix, 
                show.plot.flag=TRUE, plotly.flag=TRUE, 
                prefix.plot=names(rescList1)[2])

RS loading positive controls

rs2.pos.ctrls <- ReadDataFrameFromTsv(file.name.path="../data/RS2_pos_control_genes_BMC_genomics.csv")

rs2.pos.ctrls <- rs2.pos.ctrls$MGI.Symbol

rs2.pos.map <- convertGenesViaBiomart(specie="mm10", filter="mgi_symbol",
                    filter.values=rs2.pos.ctrls, c("external_gene_name",
                                "mgi_symbol", "entrezgene"))

rs2.pos.map.nna <- rs2.pos.map[-which(is.na(rs2.pos.map$entrezgene)),]

rs2.pos.genes.entrez <- rs2.pos.map.nna$entrezgene
res.o.map <- convertGenesViaBiomart(specie="mm10", filter="entrezgene",
                            filter.values=rownames(rescList1[[2]]), 
                            c("external_gene_name", "mgi_symbol", "entrezgene"))


res.o <- attachGeneColumnToDf(mainDf=rescList1[[1]],
                                genesMap=res.o.map,
                                rowNamesIdentifier="entrezgene",
                                mapFromIdentifier="entrezgene",
                                mapToIdentifier="external_gene_name")


PlotVolcanoPlot(de.results=res.o, counts.dataframe=normPropCountsUqua, design.matrix=designMatrix,
                show.plot.flag=TRUE, plotly.flag=TRUE, save.plot=FALSE, prefix.plot=names(rescList1)[2], threshold=0.05, 
                positive.ctrls.list=rs2.pos.genes.entrez)
# PlotMAPlotCounts(de.results=res.o, counts.dataframe=normExprData, design.matrix=desMat,
                 # show.plot.flag=TRUE, plotly.flag=TRUE, save.plot=FALSE, prefix.plot=names(rescList1)[2], threshold=0.05)